1 Podsumowanie analizy

TODO

2 Biblioteki

library(dplyr)
library(ggplot2)
library(tidyr)
library(plotly)
library(knitr)

3 Wczytanie danych

data <- read.csv("mp_batteries.csv")

columns <- names(data)
string_columns <- c("Battery.Formula", "Working.Ion", "Formula.Charge", "Formula.Discharge")
numeric_columns <- setdiff(columns, c(string_columns, "Battery.ID"))

4 Podstawowe statystyki

Liczba wierszy: 4351.

Podsumowanie:

knitr::kable(summary(data))
Battery.ID Battery.Formula Working.Ion Formula.Charge Formula.Discharge Max.Delta.Volume Average.Voltage Gravimetric.Capacity Volumetric.Capacity Gravimetric.Energy Volumetric.Energy Atomic.Fraction.Charge Atomic.Fraction.Discharge Stability.Charge Stability.Discharge Steps Max.Voltage.Step
Length:4351 Length:4351 Length:4351 Length:4351 Length:4351 Min. : 0.00002 Min. :-7.755 Min. : 5.176 Min. : 24.08 Min. :-583.5 Min. :-2208.1 Min. :0.00000 Min. :0.007407 Min. :0.00000 Min. :0.00000 Min. :1.000 Min. : 0.0000
Class :character Class :character Class :character Class :character Class :character 1st Qu.: 0.01747 1st Qu.: 2.226 1st Qu.: 88.108 1st Qu.: 311.62 1st Qu.: 211.7 1st Qu.: 821.6 1st Qu.:0.00000 1st Qu.:0.086957 1st Qu.:0.03301 1st Qu.:0.01952 1st Qu.:1.000 1st Qu.: 0.0000
Mode :character Mode :character Mode :character Mode :character Mode :character Median : 0.04203 Median : 3.301 Median : 130.691 Median : 507.03 Median : 401.8 Median : 1463.8 Median :0.00000 Median :0.142857 Median :0.07319 Median :0.04878 Median :1.000 Median : 0.0000
NA NA NA NA NA Mean : 0.37531 Mean : 3.083 Mean : 158.291 Mean : 610.62 Mean : 444.1 Mean : 1664.0 Mean :0.03986 Mean :0.159077 Mean :0.14257 Mean :0.12207 Mean :1.167 Mean : 0.1503
NA NA NA NA NA 3rd Qu.: 0.08595 3rd Qu.: 4.019 3rd Qu.: 187.600 3rd Qu.: 722.75 3rd Qu.: 614.4 3rd Qu.: 2252.3 3rd Qu.:0.04762 3rd Qu.:0.200000 3rd Qu.:0.13160 3rd Qu.:0.09299 3rd Qu.:1.000 3rd Qu.: 0.0000
NA NA NA NA NA Max. :293.19322 Max. :54.569 Max. :2557.627 Max. :7619.19 Max. :5926.9 Max. :18305.9 Max. :0.90909 Max. :0.993333 Max. :6.48710 Max. :6.27781 Max. :6.000 Max. :26.9607

5 Analiza wartości atrybutów

5.1 Atrybuty tekstowe

5.1.1 10 naliczniej występujących wartości dla każdego atrybutu tekstowego

counts <- data.frame()
for (col in c(string_columns, "Battery.ID"))
{
  col_counts <- data %>%
    count(get(col), name = "Count") %>%
    rename(Value = "get(col)") %>%
    arrange(desc(Count))
  col_counts$var <- col
  counts <- rbind(counts, col_counts)
  
  top_10_counts <- col_counts %>%
    select(c("Value", "Count")) %>%
    slice(1:10)
  print(knitr::kable(top_10_counts, caption = paste("10 najliczniej występujących wartości zmiennej", col)))
  cat("\n")
}
10 najliczniej występujących wartości zmiennej Battery.Formula
Value Count
Li0-1V2OF5 19
Li0-1CoPO4 18
Li0-1FePO4 18
Li0-3MnFeCo(PO4)3 17
Li0-1MnPO4 15
Li0-1V4OF11 15
Li0-1V4O5F7 12
Li0-1VF5 12
Li0-1CrP2O7 11
Li0-2MnP2O7 11
10 najliczniej występujących wartości zmiennej Working.Ion
Value Count
Li 2440
Ca 435
Mg 423
Zn 366
Na 309
K 107
Al 95
Y 93
Rb 50
Cs 33
10 najliczniej występujących wartości zmiennej Formula.Charge
Value Count
MnO2 49
TiO2 47
VO2 46
CrO2 45
CoO2 43
NiO2 41
FeO2 36
FePO4 26
WO2 25
CoPO4 24
10 najliczniej występujących wartości zmiennej Formula.Discharge
Value Count
LiCoPO4 19
LiFePO4 19
LiMnPO4 19
LiV2OF5 19
Li5Mn6(BO3)6 18
Li3MnFeCo(PO4)3 17
LiV4OF11 15
Li2MnP2O7 14
Li2FeSiO4 13
LiCrPO4 12
10 najliczniej występujących wartości zmiennej Battery.ID
Value Count
mp-1001925_Mg 1
mp-1003319_Ca 1
mp-10033_Cs 1
mp-10033_Rb 1
mp-1008911_Li 1
mp-1009555_Li 1
mp-1009747_Li 1
mp-1009747_Na 1
mp-1012668_Li 1
mp-1012678_Na 1

5.1.2 Liczba wystąpień wartości

ggplot(counts, aes(x = Count)) +
geom_histogram(binwidth = 1, fill = "green", alpha = 0.7) +
labs(
  title = paste("Liczba wystąpień wartości dla zmiennej"),
  x = "Liczba wystąpień",
  y = "Liczba różnych wartości"
) +
facet_wrap(~var, scales="free") +
theme_minimal()

5.1.3 Wnioski

Battery.ID to idenryfikator baterii. Jest on unikalny w zbiorze. Zmienne Battery.Formula, Formula.Charge i Formula.Discharge cechują się dużą liczbą różnych wartości. Oznacza to że testowane jest wiele różnych możliwych subastancji, które mogą zostać użyte do produkcji baterii. Inaczej jest w przypadku zmiennej Working.Ion. Ponad połowa materiałów jako głównego jonu używa litu. Wydaje się to być dominujący trend w badaniach nad bateriami.

5.2 Atrybuty liczbowe

Wartości puste:

nan_counts <- sapply(numeric_columns, function(col) sum(is.nan(data[[col]])))

nan_counts_df <- data.frame(
  nan = nan_counts
)

print(knitr::kable(nan_counts_df))
nan
Max.Delta.Volume 0
Average.Voltage 0
Gravimetric.Capacity 0
Volumetric.Capacity 0
Gravimetric.Energy 0
Volumetric.Energy 0
Atomic.Fraction.Charge 0
Atomic.Fraction.Discharge 0
Stability.Charge 0
Stability.Discharge 0
Steps 0
Max.Voltage.Step 0

Rozkłady wartości:

numeric_df <- data[, numeric_columns]
numeric_df_long <- numeric_df %>%
  pivot_longer(colnames(numeric_df)) %>% 
  as.data.frame()

ggplot(numeric_df_long, aes(x = value)) +
  geom_histogram(fill = "green", alpha = 0.7) + 
  facet_wrap(~ name, scales = "free") +
  theme_minimal()

6 Korelacja

numeric_df <- data[, numeric_columns]

correlation_matrix <- cor(numeric_df)
correlation_df <- as.data.frame(as.table(correlation_matrix))
names(correlation_df) <- c("x", "y", "cor")

correlation_df_one_dir <- correlation_df[as.character(correlation_df$x) < as.character(correlation_df$y), ]

Korelacja wszytskich par zmiennych numerycznych

knitr::kable(correlation_df_one_dir[order(-abs(correlation_df_one_dir$cor)), ])
x y cor
65 Gravimetric.Energy Volumetric.Energy 0.9283253
39 Gravimetric.Capacity Volumetric.Capacity 0.8584163
117 Stability.Charge Stability.Discharge 0.8028701
32 Atomic.Fraction.Discharge Gravimetric.Capacity 0.6807716
50 Average.Voltage Gravimetric.Energy 0.6656523
44 Atomic.Fraction.Discharge Volumetric.Capacity 0.6180186
91 Atomic.Fraction.Charge Atomic.Fraction.Discharge 0.5974157
62 Average.Voltage Volumetric.Energy 0.5545191
132 Max.Voltage.Step Steps 0.5352539
3 Gravimetric.Capacity Max.Delta.Volume 0.4337733
137 Gravimetric.Energy Max.Voltage.Step 0.3292322
64 Volumetric.Capacity Volumetric.Energy 0.3257482
125 Gravimetric.Energy Steps 0.2946075
8 Atomic.Fraction.Discharge Max.Delta.Volume 0.2906921
72 Max.Voltage.Step Volumetric.Energy 0.2526625
37 Max.Delta.Volume Volumetric.Capacity 0.2424769
71 Steps Volumetric.Energy 0.2381420
63 Gravimetric.Capacity Volumetric.Energy 0.2304216
51 Gravimetric.Capacity Gravimetric.Energy 0.2132463
38 Average.Voltage Volumetric.Capacity -0.2128178
41 Gravimetric.Energy Volumetric.Capacity 0.2098406
69 Stability.Charge Volumetric.Energy 0.1783271
20 Atomic.Fraction.Discharge Average.Voltage -0.1716903
101 Gravimetric.Energy Stability.Charge 0.1669819
98 Average.Voltage Stability.Charge 0.1661371
128 Atomic.Fraction.Discharge Steps 0.1641713
67 Atomic.Fraction.Charge Volumetric.Energy -0.1473523
26 Average.Voltage Gravimetric.Capacity -0.1462222
123 Gravimetric.Capacity Steps 0.1333977
31 Atomic.Fraction.Charge Gravimetric.Capacity 0.1289210
110 Average.Voltage Stability.Discharge -0.1284568
134 Average.Voltage Max.Voltage.Step 0.1271208
47 Steps Volumetric.Capacity 0.1037051
140 Atomic.Fraction.Discharge Max.Voltage.Step 0.1019796
45 Stability.Charge Volumetric.Capacity 0.1015305
55 Atomic.Fraction.Charge Gravimetric.Energy -0.0972924
135 Gravimetric.Capacity Max.Voltage.Step 0.0951906
108 Max.Voltage.Step Stability.Charge 0.0940466
2 Average.Voltage Max.Delta.Volume -0.0823707
113 Gravimetric.Energy Stability.Discharge -0.0782609
56 Atomic.Fraction.Discharge Gravimetric.Energy 0.0645248
99 Gravimetric.Capacity Stability.Charge 0.0633871
130 Stability.Discharge Steps -0.0631686
122 Average.Voltage Steps 0.0627851
48 Max.Voltage.Step Volumetric.Capacity 0.0626085
68 Atomic.Fraction.Discharge Volumetric.Energy 0.0610586
5 Gravimetric.Energy Max.Delta.Volume -0.0609858
70 Stability.Discharge Volumetric.Energy -0.0599949
61 Max.Delta.Volume Volumetric.Energy -0.0588321
115 Atomic.Fraction.Charge Stability.Discharge -0.0523971
19 Atomic.Fraction.Charge Average.Voltage -0.0385556
129 Stability.Charge Steps -0.0374860
97 Max.Delta.Volume Stability.Charge 0.0337587
104 Atomic.Fraction.Discharge Stability.Charge 0.0324051
46 Stability.Discharge Volumetric.Capacity 0.0317012
127 Atomic.Fraction.Charge Steps 0.0297369
103 Atomic.Fraction.Charge Stability.Charge -0.0273571
7 Atomic.Fraction.Charge Max.Delta.Volume 0.0213153
120 Max.Voltage.Step Stability.Discharge -0.0165552
116 Atomic.Fraction.Discharge Stability.Discharge 0.0143204
121 Max.Delta.Volume Steps -0.0132582
111 Gravimetric.Capacity Stability.Discharge 0.0125390
133 Max.Delta.Volume Max.Voltage.Step -0.0099251
109 Max.Delta.Volume Stability.Discharge 0.0077357
139 Atomic.Fraction.Charge Max.Voltage.Step 0.0053420
43 Atomic.Fraction.Charge Volumetric.Capacity 0.0012456
p <- ggplot(correlation_df) +
  geom_tile(aes(x = x, y = y, fill = abs(cor), text = paste("Korelacja pomiędzy", x, "i", y, "=", abs(cor)))) +
  theme(axis.title = element_blank()) +
  labs(fill="Korelacja") +
  scale_fill_gradient(low="white", high="green") +
  theme_minimal()

ggplotly(p, tooltip = "text") %>%
  layout(
    xaxis = list(
      tickangle = 45,
      title = ""
    ),
    yaxis = list(
      title = ""
    )
  )

Przedstawienie zależności 5 par zmiennych o najwyższej korelacji

top_5_correlation <- correlation_df_one_dir[order(-abs(correlation_df_one_dir$cor)), ] %>%
  slice(1:5)

knitr::kable(top_5_correlation)
x y cor
Gravimetric.Energy Volumetric.Energy 0.9283253
Gravimetric.Capacity Volumetric.Capacity 0.8584163
Stability.Charge Stability.Discharge 0.8028701
Atomic.Fraction.Discharge Gravimetric.Capacity 0.6807716
Average.Voltage Gravimetric.Energy 0.6656523
ggplotly(
  ggplot(data, aes(x = Gravimetric.Energy, y = Volumetric.Energy)) +
    geom_point(aes(
      x = Gravimetric.Energy,
      y = Volumetric.Energy,
      text = paste("ID baterii:", Battery.ID,
      "\nGravimetric.Energy:",Gravimetric.Energy,
      "\nVolumetric.Energy", Volumetric.Energy
      )
    )) +
    geom_smooth(method = lm) +
    labs(title = paste("Gravimetric.Energy i Volumetric.Energy")) +
    theme_minimal(),
  tooltip = "text"
)
ggplotly(
ggplot(data, aes(x = Gravimetric.Capacity, y = Volumetric.Capacity)) +
  geom_point(aes(
    x = Gravimetric.Capacity, 
    y = Volumetric.Capacity, 
    text = paste(
      "ID baterii:", Battery.ID, 
      "\nGravimetric.Capacity:", Gravimetric.Capacity, 
      "\nVolumetric.Capacity:", Volumetric.Capacity
    )
  )) +
  geom_smooth(method = lm) +
  labs(title = paste("Gravimetric.Capacity i Volumetric.Capacity")) +
  theme_minimal(),
  tooltip = "text"
)
ggplotly(
ggplot(data, aes(x = Stability.Charge, y = Stability.Discharge)) +
  geom_point(aes(
    x = Stability.Charge, 
    y = Stability.Discharge, 
    text = paste(
      "ID baterii:", Battery.ID, 
      "\nStability.Charge:", Stability.Charge, 
      "\nStability.Discharge:", Stability.Discharge
    )
  )) +
  geom_smooth(method = lm) +
  labs(title = paste("Stability.Charge i Stability.Discharge")) +
  theme_minimal(),
  tooltip = "text"
)
ggplotly(
ggplot(data, aes(x = Atomic.Fraction.Discharge, y = Gravimetric.Capacity)) +
  geom_point(aes(
    x = Atomic.Fraction.Discharge, 
    y = Gravimetric.Capacity, 
    text = paste(
      "ID baterii:", Battery.ID, 
      "\nAtomic.Fraction.Discharge:", Atomic.Fraction.Discharge, 
      "\nGravimetric.Capacity:", Gravimetric.Capacity
    )
  )) +
  geom_smooth(method = lm) +
  labs(title = paste("Atomic.Fraction.Discharge i Gravimetric.Capacity")) +
  theme_minimal(),
  tooltip = "text"
)
ggplotly(
ggplot(data, aes(x = Average.Voltage, y = Gravimetric.Energy)) +
  geom_point(aes(
    x = Average.Voltage, 
    y = Gravimetric.Energy, 
    text = paste(
      "ID baterii:", Battery.ID, 
      "\nAverage.Voltage:", Average.Voltage, 
      "\nGravimetric.Energy:", Gravimetric.Energy
    )
  )) +
  geom_smooth(method = lm) +
  labs(title = paste("Average.Voltage i Gravimetric.Energy")) +
  theme_minimal(),
  tooltip = "text"
)

7 Najważniejsze trendy w badaniu

8 Predykcja dalszych cech